Chapter 6 Community composition

load("data/data.Rdata")

6.1 Taxonomy overview

6.1.1 Stacked barplot

genome_metadata<- genome_metadata%>%
    mutate(phylum=str_remove_all(phylum, "p__"))
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
  geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
  scale_fill_manual(values=phylum_colors)+
  facet_grid(~region, scale="free", space="free") +
  guides(fill = guide_legend(ncol = 1)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

6.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_03ps73v1s5wneur56yo8
phylum mean sd
Bacteroidota 0.4035989436 0.145467610
Fusobacteriota 0.2524757044 0.116738961
Bacillota_A 0.1569779500 0.083006580
Pseudomonadota 0.1102730723 0.091046930
Bacillota 0.0325629996 0.047546126
Bacillota_C 0.0258155713 0.030445893
Campylobacterota 0.0053179332 0.008379673
Actinomycetota 0.0052739897 0.007053559
Deferribacterota 0.0040560203 0.006214013
Bacillota_B 0.0023438584 0.006878646
Spirochaetota 0.0009585895 0.002398975
Desulfobacterota 0.0003453677 0.001015241
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

6.1.3 Phylum percentages by site

Daneborg dogs

tinytable_5cfaiuka1egiedl9qo10
Phylum mean sd
Bacteroidota 47.42647071 12.41051445
Fusobacteriota 26.44829722 8.85569343
Bacillota_A 11.78407839 5.26808838
Pseudomonadota 9.30642610 4.09486152
Bacillota 2.01995216 1.12909691
Bacillota_C 1.82946040 0.79964496
Deferribacterota 0.44065871 0.47755553
Actinomycetota 0.33862019 0.58878812
Campylobacterota 0.21681726 0.42039106
Spirochaetota 0.07390032 0.12704418
Desulfobacterota 0.06907354 0.13605743
Bacillota_B 0.04624502 0.09256777

Ittoqqortoormii dogs

tinytable_vtzstwgmdp4x90xk1i26
Phylum mean sd
Bacteroidota 33.2933180 13.1648465
Fusobacteriota 24.0468437 14.0006093
Bacillota_A 19.6115116 8.9878472
Pseudomonadota 12.7481884 12.0768015
Bacillota 4.4926478 6.4481701
Bacillota_C 3.3336539 4.1302504
Campylobacterota 0.8467694 1.0233386
Actinomycetota 0.7161777 0.7696304
Bacillota_B 0.4225267 0.9387879
Deferribacterota 0.3705454 0.7452951
Spirochaetota 0.1178176 0.3162563

6.2 Taxonomy boxplot

6.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_9ikedqwrujo22mstfljb
family mean sd
f__Bacteroidaceae 3.967680e-01 1.500812e-01
f__Fusobacteriaceae 2.524757e-01 1.167390e-01
f__Lachnospiraceae 6.937163e-02 5.162657e-02
f__Succinivibrionaceae 4.514358e-02 4.599230e-02
f__Burkholderiaceae_A 3.599256e-02 1.658700e-02
f__Ruminococcaceae 3.000129e-02 2.083510e-02
f__Enterobacteriaceae 2.893626e-02 8.003545e-02
f__Peptostreptococcaceae 2.580817e-02 2.802819e-02
f__Clostridiaceae 1.944158e-02 3.937397e-02
f__Acidaminococcaceae 1.755676e-02 8.366470e-03
f__Selenomonadaceae 8.258810e-03 3.029587e-02
f__Erysipelotrichaceae 6.992056e-03 8.757741e-03
f__Anaeroplasmataceae 6.453126e-03 9.215863e-03
f__Lactobacillaceae 6.211658e-03 3.296719e-02
f__Turicibacteraceae 4.883145e-03 1.619002e-02
f__Coriobacteriaceae 4.215944e-03 6.053805e-03
f__Mucispirillaceae 4.056020e-03 6.214013e-03
f__Helicobacteraceae 3.509401e-03 5.952573e-03
f__Enterococcaceae 3.123536e-03 2.247890e-02
f__Oscillospiraceae 3.086101e-03 5.295318e-03
f__Butyricicoccaceae 2.721653e-03 3.113349e-03
f__Muribaculaceae 2.608256e-03 6.651236e-03
f__Peptococcaceae 2.343858e-03 6.878646e-03
f__CAG-508 2.325013e-03 1.995239e-03
f__Streptococcaceae 2.313326e-03 7.004722e-03
f__Coprobacillaceae 2.183605e-03 3.762140e-03
f__Anaerotignaceae 2.105212e-03 3.582049e-03
f__Tannerellaceae 1.883546e-03 4.440356e-03
f__Campylobacteraceae 1.808532e-03 5.010135e-03
f__Marinifilaceae 1.500172e-03 5.097462e-03
f__Brachyspiraceae 9.585895e-04 2.398975e-03
f__UBA932 8.389653e-04 2.555016e-03
f__CAG-274 8.267501e-04 1.348617e-03
f__Eggerthellaceae 8.159672e-04 1.624225e-03
f__Cellulosilyticaceae 4.732449e-04 1.929303e-03
f__Desulfovibrionaceae 3.453677e-04 1.015241e-03
f__Anaerovoracaceae 3.247569e-04 7.467800e-04
f__CAG-826 2.912959e-04 5.753363e-04
f__Bifidobacteriaceae 2.420788e-04 1.807747e-03
f__Peptoniphilaceae 2.413738e-04 8.768049e-04
f__JAAYXM01 1.716132e-04 6.218848e-04
f__Beijerinckiaceae 1.585673e-04 1.207613e-03
f__Mycoplasmoidaceae 6.465891e-05 3.399451e-04
f__Burkholderiaceae_C 4.210337e-05 2.455972e-04
f__ 3.978316e-05 1.733718e-04
f__Acutalibacteraceae 3.977628e-05 2.255595e-04
f__UBA3375 2.708135e-05 9.892183e-05
f__Catellicoccaceae 1.951115e-05 1.485925e-04
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per region
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~region)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

6.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
tinytable_wc7vwnvcyk546eo7hi84
genus mean sd
Phocaeicola 2.276024e-01 1.030967e-01
Fusobacterium_A 1.344294e-01 9.447745e-02
Fusobacterium_B 9.108268e-02 4.487667e-02
Bacteroides 5.732156e-02 3.278054e-02
Alloprevotella 4.766166e-02 5.549928e-02
Anaerobiospirillum 4.414681e-02 4.592350e-02
Mediterranea 3.741307e-02 2.702094e-02
Sutterella 3.103683e-02 1.516295e-02
Escherichia 2.807037e-02 7.932913e-02
Faecalibacterium 2.340600e-02 1.733790e-02
Prevotella 2.018810e-02 4.308974e-02
Phascolarctobacterium_A 1.755676e-02 8.366470e-03
Blautia 1.618063e-02 1.813142e-02
Peptacetobacter 1.394900e-02 1.201021e-02
Sarcina 1.139524e-02 2.964173e-02
Ruminococcus_B 1.063530e-02 1.750319e-02
Faecalimonas 9.226381e-03 1.329828e-02
Cetobacterium_A 8.844355e-03 4.284187e-02
Megamonas 8.258810e-03 3.029587e-02
Blautia_A 7.895308e-03 7.135310e-03
Peptostreptococcus 7.383090e-03 2.411080e-02
CALUXS01 6.453126e-03 9.215863e-03
Schaedlerella 5.844351e-03 8.304399e-03
Paraprevotella 5.729017e-03 1.173181e-02
Turicibacter 4.883145e-03 1.619002e-02
Aphodousia 4.280159e-03 5.412066e-03
Collinsella 4.215944e-03 6.053805e-03
Clostridium 4.215383e-03 8.849574e-03
Eisenbergiella 4.127377e-03 4.150112e-03
Fournierella 3.810018e-03 3.680947e-03
Enterococcus_B 3.058641e-03 2.248437e-02
Mucispirillum 3.010977e-03 5.847022e-03
Lactobacillus 2.930739e-03 1.570378e-02
Ligilactobacillus 2.808046e-03 1.582268e-02
Butyricicoccus 2.721653e-03 3.113349e-03
Allobaculum 2.644001e-03 2.977452e-03
Ventrimonas 2.628617e-03 5.341587e-03
Limisoma 2.608256e-03 6.651236e-03
GCA-900066495 2.535003e-03 7.209322e-03
Enterocloster 2.428325e-03 2.738649e-03
UMGS1590 2.343858e-03 6.878646e-03
CAJMNU01 2.244379e-03 3.136201e-03
Faecousia 2.101022e-03 3.902371e-03
Avimicrobium 1.978521e-03 1.756834e-03
Parabacteroides 1.883546e-03 4.440356e-03
Campylobacter_D 1.808532e-03 5.010135e-03
Helicobacter_A 1.686258e-03 5.099817e-03
Merdicola 1.623972e-03 1.133414e-03
Lachnospira 1.574311e-03 3.698223e-03
Odoribacter 1.500172e-03 5.097462e-03
Clostridium_H 1.301880e-03 4.013538e-03
Lactococcus 1.258562e-03 5.625124e-03
JAHHTG01 1.213503e-03 6.635896e-03
Roseburia 1.209379e-03 5.131904e-03
Anaerotignum 1.107741e-03 1.705435e-03
Mediterraneibacter 1.062241e-03 6.028260e-03
Streptococcus 1.054765e-03 4.437524e-03
Holdemanella 1.006879e-03 2.085413e-03
Romboutsia_C 9.837030e-04 6.124611e-03
Brachyspira 9.585895e-04 2.398975e-03
Anaerobiospirillum_A 9.429832e-04 2.289970e-03
Fimicola 8.751261e-04 3.276954e-03
Klebsiella 8.658896e-04 4.763963e-03
Clostridium_Q 8.577680e-04 1.636674e-03
Phocaeicola_A 8.521991e-04 2.195967e-03
Cryptobacteroides 8.389653e-04 2.555016e-03
Slackia_A 8.159672e-04 1.624225e-03
Hungatella_A 8.124222e-04 1.814413e-03
Dwaynesavagella 8.116830e-04 3.414155e-03
CALVGN01 8.096684e-04 1.171787e-03
Clostridium_J 7.756410e-04 2.354818e-03
Gallispira 7.671477e-04 1.302383e-03
Avilachnospira 7.595761e-04 1.773406e-03
Copromonas 7.399478e-04 1.481977e-03
CAG-269 7.010411e-04 1.856952e-03
Faecalibacillus 6.819025e-04 3.505549e-03
Parasutterella 6.122753e-04 4.662950e-03
Helicobacter_G 6.017605e-04 2.750447e-03
Helicobacter_B 5.971256e-04 2.038090e-03
Faecalitalea 5.711255e-04 1.222346e-03
Clostridium_G 5.351194e-04 1.970231e-03
Helicobacter_C 5.226296e-04 2.160895e-03
Thomasclavelia 5.142818e-04 7.308848e-04
Amedibacterium 4.891434e-04 2.468238e-03
Limosilactobacillus 4.728731e-04 1.797457e-03
Romboutsia 4.692510e-04 1.463961e-03
Catenibacterium 4.606070e-04 1.271267e-03
Negativibacillus 4.556881e-04 1.030846e-03
UBA9414 4.053743e-04 8.124246e-04
Mailhella 3.453677e-04 1.015241e-03
Dysosmobacter 3.408519e-04 9.011948e-04
Gallibacter 3.247569e-04 7.467800e-04
Oliverpabstia 3.231635e-04 4.962950e-04
Hathewaya 3.079613e-04 1.086915e-03
Lawsonibacter 3.006862e-04 9.125541e-04
Onthovivens 2.912959e-04 5.753363e-04
Cellulosilyticum 2.671023e-04 1.579720e-03
Bifidobacterium 2.420788e-04 1.807747e-03
Anaerosphaera 2.413738e-04 8.768049e-04
Fimiplasma 2.408513e-04 6.143729e-04
UMGS1370 2.210713e-04 4.205404e-04
Zhenhengia 2.061426e-04 1.157043e-03
Paraclostridium 2.019028e-04 5.515030e-04
RGIG7332 1.716132e-04 6.218848e-04
Pseudoflavonifractor_A 1.669855e-04 2.623799e-04
Rhodoblastus 1.585673e-04 1.207613e-03
Dielma 1.490374e-04 2.938513e-04
Beduini 1.484189e-04 3.120260e-04
CCUG-7971 1.481056e-04 5.489315e-04
Merdivicinus 1.397210e-04 4.985405e-04
Terrisporobacter 1.381169e-04 6.235356e-04
MGBC140090 1.375437e-04 3.689981e-04
Metalachnospira 1.223447e-04 2.462986e-04
Amedibacillus 1.086979e-04 4.740354e-04
Anaerofilum 1.035896e-04 4.184692e-04
Helicobacter_D 1.016273e-04 5.427803e-04
Clostridium_AH 9.867733e-05 6.883540e-04
Pseudoscilispira 9.795886e-05 1.700112e-04
JAGZHZ01 8.792791e-05 3.004922e-04
Evtepia 7.859597e-05 4.704903e-04
RGIG3102 6.956484e-05 3.106027e-04
Enterococcus 6.489543e-05 3.221736e-04
Mycoplasmoides 6.465891e-05 3.399451e-04
Duodenibacillus 6.329115e-05 1.362160e-04
Succinivibrio 5.378879e-05 1.812214e-04
Acetatifactor 5.089177e-05 1.814556e-04
Paenalcaligenes 4.210337e-05 2.455972e-04
Scybalenecus 3.977628e-05 2.255595e-04
Angelakisella 2.914355e-05 1.019388e-04
Scybalocola 2.788638e-05 1.143383e-04
UBA3375 2.708135e-05 9.892183e-05
Merdisoma 2.284156e-05 8.868611e-05
Catellicoccus 1.951115e-05 1.485925e-04
UBA866 9.037031e-06 5.329642e-05
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per region
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~region)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")